In [ ]:
# imports
import os
from lxml import etree
In [ ]:
# set sql output directory
sql_out = r"C:\temp\dtsxsql"
if not os.path.isdir(sql_out):
os.makedirs(sql_out)
In [ ]:
# set dtsx package file
ssis_dtsx = r'C:\temp\dtsx\ParseXML.dtsx'
if not os.path.isfile(ssis_dtsx):
print("no package file")
In [ ]:
# read and parse ssis package
tree = etree.parse(ssis_dtsx)
root = tree.getroot()
root.tag
In [ ]:
# collect unique lxml transformed element tags
ele_tags = set()
for ele in root.xpath(".//*"):
ele_tags.add(ele.tag)
print(ele_tags)
print(len(ele_tags))
In [ ]:
# scan package tree and extract sql source code
total_bytes = 0
package_name = root.attrib['{www.microsoft.com/SqlServer/Dts}ObjectName'].replace(" ","")
for cnt, ele in enumerate(root.xpath(".//*")):
if ele.tag == "{www.microsoft.com/SqlServer/Dts}Executable":
attr = ele.attrib
for child0 in ele:
if child0.tag == "{www.microsoft.com/SqlServer/Dts}ObjectData":
for child1 in child0:
sql_comment = attr["{www.microsoft.com/SqlServer/Dts}ObjectName"].strip()
if child1.tag == "{www.microsoft.com/sqlserver/dts/tasks/sqltask}SqlTaskData":
dtsx_sql = child1.attrib["{www.microsoft.com/sqlserver/dts/tasks/sqltask}SqlStatementSource"]
dtsx_sql = "-- " + sql_comment + "\n" + dtsx_sql
sql_file = sql_out + "\\" + package_name + str(cnt) + ".sql"
total_bytes += len(dtsx_sql)
print((len(dtsx_sql), sql_comment, sql_file))
with open(sql_file, "w") as file:
file.write(dtsx_sql)
print(('total sql bytes',total_bytes))
In [ ]: